*This file is designed to take the married couple and single person Census files and source country data and produce a dataset of immigrants for analysis
clear
set mem 4g
set more off
global path "u:\user3\klp27\Blau and Kahn\Immigration"

*Create source country dataset
use "$path\finaldata"
*replace bpld==10000 if bpld==9900
rename gdpconcap gdpcap
rename i_gdpconcap i_gdpcap
rename i_gdpcon i_gdp
drop if bpld<10000
keep if year==1950 | year==1955 | year==1960 | year==1965 | year==1970 | year==1975 | year==1980 | year==1985 | year==1990 | year==1995 | year==2000
drop num1865cur-wgt1865arr

*Create region code (see Blau, Kahn, Moriarty and Souza (2003) p. 442)
gen reg_code=.
replace reg_code=real(substr(string(final_code),1,2))
replace reg_code=reg_code+10 if reg_code>=25
replace reg_code=real(substr(string(reg_code),1,1))
replace reg_code=3 if final_label=="West Indies"
replace reg_code=reg_code+1 if reg_code>=6
replace reg_code=6 if final_code==52200 | (final_code>=53000 & final_code<=54400) | (final_code>=60011 & final_code<=60014)
replace reg_code=5 if final_code==16040
replace reg_code=8 if final_code==16020
label define reg_code 1 "North America" 2 "Central America" 3 "Carribean" 4 "South America" 5 "Europe" 6 "Middle East" 7 "Asia" 8 "Africa" 9 "Oceania"
label values reg_code reg_code

preserve
for var fertrate-reg_code: rename X Xcurr
sort bpld year
save "$path\Source country data current date"
restore
preserve
rename year yrimmig2
for var fertrate-reg_code: rename X Xarr
sort bpld yrimmig2
save "$path\Source country data arrival date"
for var bpld-reg_code: rename X spX
sort spbpld spyrimmig2
save "$path\Source country data arrival date (spouse)"
restore
for var final_label final_code bpld: rename X spX
for var fertrate-reg_code: rename X spXcurr
sort spbpld year
save "$path\Source country data current date (spouse)"

foreach year of numlist 1980 1990 2000 {
  use "$path\Source country data current date", clear
  keep if year==`year'
  keep bpld gdpcapcurr
  rename gdpcapcurr gdpcap`year'
  save "$path\Source country data current date `year'", replace
  use "$path\Source country data current date (spouse)", clear
  keep if year==`year'
  keep spbpld spgdpcapcurr
  rename spgdpcapcurr spgdpcap`year'
  save "$path\Source country data current date `year' (spouse)", replace
}

*Create updated country group codes
use "$path\Country group codes.dta", clear
rename bpld ipums_code
*Combine small countries with little source country data
replace final_label="West Indies" if ipums_code==16010 | ipums_code==26042 | (ipums_code>=26045 & ipums_code<=26059)
replace final_label="Guiana" if ipums_code==30035 | ipums_code==30055
replace final_label="Switzerland" if ipums_code==42200
replace final_label="Spain" if ipums_code==43100
replace final_label="Italy" if ipums_code==43900
replace final_label="Pacific Islands" if ipums_code==71023 | ipums_code==71024 | ipums_code==71025
sort final_label ipums_code
by final_label: egen final_code=min(ipums_code) if drop==0
sort ipums_code
drop ipums_label grouped_label drop
save "$path\Augmented country group codes"

clear
global path "u:\user3\klp27\Blau and Kahn\Immigration"
clear
set mem 6g
set more off
use "$path\Census 1980 Analysis data"
append using "$path\Census 1990 Analysis data"
append using "$path\Census 2000 Analysis data"

drop countrynoarr-spreg_codecurr
/*
gen yrimmig2=.
replace yrimmig2=1950 if impre50==1
replace yrimmig2=1955 if im5059==1
replace yrimmig2=1960 if im6064==1
replace yrimmig2=1965 if im6569==1
replace yrimmig2=1970 if im7074==1
replace yrimmig2=1975 if im7579==1
replace yrimmig2=1980 if im8084==1
replace yrimmig2=1985 if im8590==1
replace yrimmig2=1990 if im9194==1
replace yrimmig2=1995 if im9500==1

gen spyrimmig2=.
replace spyrimmig2=1950 if spimpre50==1
replace spyrimmig2=1955 if spim5059==1
replace spyrimmig2=1960 if spim6064==1
replace spyrimmig2=1965 if spim6569==1
replace spyrimmig2=1970 if spim7074==1
replace spyrimmig2=1975 if spim7579==1
replace spyrimmig2=1980 if spim8084==1
replace spyrimmig2=1985 if spim8590==1
replace spyrimmig2=1990 if spim9194==1
replace spyrimmig2=1995 if spim9500==1
*/
sort bpld yrimmig2
merge bpld yrimmig2 using "$path\Source country data arrival date"
gen droppedcty=(bpld<. & yrimmig2<. & _merge==1)
drop if _merge==2
drop _merge
*drop censusrate illegalrate i_censusrate
sort spbpld spyrimmig2
merge spbpld spyrimmig2 using "$path\Source country data arrival date (spouse)"
gen spdroppedcty=(spbpld<. & spyrimmig2<. & _merge==1)
drop if _merge==2
drop _merge
*drop spcensusrate spillegalrate spi_censusrate

foreach year2 of numlist 1980 1990 2000 {
  sort bpld
  merge bpld using "$path\Source country data current date `year2'"
  drop if _merge==2
  drop _merge
  sort spbpld
  merge spbpld using "$path\Source country data current date `year2' (spouse)"
  drop if _merge==2
  drop _merge
}

sort bpld year
merge bpld year using "$path\Source country data current date"
drop if _merge==2
drop _merge
sort spbpld year
merge spbpld year using "$path\Source country data current date (spouse)"
drop if _merge==2
drop _merge

drop milesarr
rename milescurr miles
drop spmilesarr
rename spmilescurr spmiles

erase "$path\Augmented country group codes.dta"
erase "$path\Source country data arrival date.dta"
erase "$path\Source country data arrival date (spouse).dta"
erase "$path\Source country data current date.dta"
erase "$path\Source country data current date (spouse).dta"
erase "$path\Source country data current date 1980.dta"
erase "$path\Source country data current date 1980 (spouse).dta"
erase "$path\Source country data current date 1990.dta"
erase "$path\Source country data current date 1990 (spouse).dta"
erase "$path\Source country data current date 2000.dta"
erase "$path\Source country data current date 2000 (spouse).dta"

*Keep only those who are 18-65 and married to someone 18-65
keep if age>=18 & age<=65 & spage>=18 & spage<=65

capture log close
gen agesq=age^2
gen spagesq=spage^2
gen year00=0
replace year00=1 if year==2000
gen year90=0
replace year90=1 if year==1990
gen speakengwell=0
replace speakengwell=1 if speakeng==3 | speakeng==4 | speakeng==5
gen spspeakengwell=0
replace spspeakengwell=1 if spspeakeng==3 | spspeakeng==4 | spspeakeng==5
gen nchild0= nchm0+ nchf0
gen nchild1= nchm1+ nchf1
gen nchild2= nchm2+ nchf2
gen nchildu3= nchm0+ nchm1+ nchm2+ nchf0+ nchf1+ nchf2
gen nchild35=nchm3+ nchm4+ nchm5+ nchf3+ nchf4+ nchf5
gen nchild611=nchm6+ nchm7+ nchm8+ nchm9+ nchm10+ nchm11+ nchf6+ nchf7+ nchf8+ nchf9+ nchf10+ nchf11
gen nchild1217=nchm12+ nchm13+ nchm14+ nchm15+ nchm16+ nchm17+ nchf12+ nchf13+ nchf14+ nchf15+ nchf16+ nchf17
foreach var of varlist impre50 im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 ysm1 {
  replace `var'=0 if immigrant==0
}
foreach var of varlist spimpre50 spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spysm1 {
  replace `var'=0 if spimmigrant==0
}
gen ysmsq=ysm1^2
gen spysmsq=spysm1^2
/*gen ear15ratioarr=ear15farr/ear15marr
gen spear15ratioarr=spear15farr/spear15marr
gen ear15ratiocurr=ear15fcurr/ear15mcurr
gen spear15ratiocurr=spear15fcurr/spear15mcurr

gen ear15ratioarr=ear15ffillarr/ear15mfillarr
gen spear15ratioarr=spear15ffillarr/spear15mfillarr
gen ear15ratiocurr=ear15ffillcurr/ear15mfillcurr
gen spear15ratiocurr=spear15ffillcurr/spear15mfillcurr*/

keep annhours spannhours impre50 im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 ysm1 ysmsq spimpre50 spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spysm1 spysmsq year90 year00 age agesq educ2-educ4 speakengwell nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217 rhisp1-rhisp5 reg2-reg9 spage spagesq speduc2-speduc4 spspeakengwell sprhisp1-sprhisp5 fertratearr gdpcaparr ear15ratioarr proprefugarr spfertratearr spgdpcaparr spear15ratioarr spproprefugarr fertratecurr gdpcapcurr ear15ratiocurr proprefugcurr spfertratecurr spgdpcapcurr spear15ratiocurr spproprefugcurr perwt spperwt sex spsex year bpld spbpld immigrant spimmigrant final_code final_label spfinal_code spfinal_label yrimmig spyrimmig fertratearr gdpcaparr ear15ratioarr spfertratearr spgdpcaparr spear15ratioarr ear15ratiocurr spear15ratiocurr fertratecurr gdpcapcurr spfertratecurr spgdpcapcurr primarymarr primaryfarr primarymcurr primaryfcurr spprimarymarr spprimaryfarr spprimarymcurr spprimaryfcurr secondarymarr secondaryfarr secondarymcurr secondaryfcurr spsecondarymarr spsecondaryfarr spsecondarymcurr spsecondaryfcurr nchild marst spmarst stateicp quhrswor qwkswork spquhrswor spqwkswork qbpl qyrimm spqbpl spqyrimm englspkarr engloffarr spenglspkarr spengloffarr englspkcurr engloffcurr spenglspkcurr spengloffcurr ageatarr ageatarrmid spageatarr spageatarrmid incwage incbus classwkd lnw qincwage qincbus spincwage splnw spqincwage spincbus spclasswkd spqincbus incbus00 spincbus00 incfarm qincfarm spincfarm spqincfarm wkswork1 spwkswork1 wage spwage ownnonwageinc spownnonwageinc /*emigratetotarr emigratemarr emigratefarr i_emigratetotarr spemigratetotarr spemigratemarr spemigratefarr spi_emigratetotarr emigratetotcurr emigratemcurr emigratefcurr i_emigratetotcurr spemigratetotcurr spemigratemcurr spemigratefcurr spi_emigratetotcurr*/ miles spmiles gradecomp spgradecomp droppedcty spdroppedcty

*Create education interactions
foreach var of varlist gradecomp {
  gen im`var'=`var'*immigrant
  gen spim`var'=sp`var'*spimmigrant
}

replace rhisp4=1 if rhisp5==1
replace sprhisp4=1 if sprhisp5==1
drop rhisp5

*Create a combined non-wage inc var for married couples
gen nonwageinc=ownnonwageinc+spownnonwageinc

replace final_code=10000 if immigrant==0
replace spfinal_code=10000 if spimmigrant==0
replace final_label="" if immigrant==0
replace spfinal_label="" if spimmigrant==0

foreach var of varlist primarymarr primaryfarr primarymcurr primaryfcurr secondarymarr secondaryfarr secondarymcurr secondaryfcurr miles imgradecomp {
  replace `var'=0 if immigrant==0
  replace sp`var'=0 if spimmigrant==0

}
generate married=marst==1
generate spmarried=spmarst==1
generate y05=ysm1==2.5
generate y610=ysm1==8
generate y1115=ysm1==13
generate y1620=ysm1==18
generate y2130=ysm1==25.5
generate y40=ysm1==40

generate spy05=spysm1==2.5
generate spy610=spysm1==8
generate spy1115=spysm1==13
generate spy1620=spysm1==18
generate spy2130=spysm1==25.5
generate spy40=spysm1==40

generate california=stateicp==71
generate florida=stateicp==43
generate texas=stateicp==49
generate ny=stateicp==13
generate illinois=stateicp==21
generate nj=stateicp==12

gen reg_code=.
replace reg_code=real(substr(string(final_code),1,2))
replace reg_code=reg_code+10 if reg_code>=25
replace reg_code=real(substr(string(reg_code),1,1))
replace reg_code=3 if final_label=="West Indies"
replace reg_code=reg_code+1 if reg_code>=6
replace reg_code=6 if (final_code>=53000 & final_code<=54400) | (final_code>=60011 & final_code<=60014)
*label define reg_code 1 "North America" 2 "Central America" 3 "Carribean" 4 "South America" 5 "Europe" 6 "Middle East" 7 "Asia" 8 "Africa" 9 "Oceania"
label values reg_code reg_code

gen spreg_code=.
replace spreg_code=real(substr(string(spfinal_code),1,2))
replace spreg_code=spreg_code+10 if spreg_code>=25
replace spreg_code=real(substr(string(spreg_code),1,1))
replace spreg_code=3 if spfinal_label=="West Indies"
replace spreg_code=spreg_code+1 if spreg_code>=6
replace spreg_code=6 if (spfinal_code>=53000 & spfinal_code<=54400) | (spfinal_code>=60011 & spfinal_code<=60014)
*label define spreg_code 1 "North America" 2 "Central America" 3 "Carribean" 4 "South America" 5 "Europe" 6 "Middle East" 7 "Asia" 8 "Africa" 9 "Oceania"
label values spreg_code spreg_code

generate legalhrs=quhrswor==0 & qwkswork==0
generate splegalhrs=spquhrswor==0 & spqwkswork==0

generate legalimm=qbpl==0 & qyrimm==0
generate splegalimm=spqbpl==0 & spqyrimm==0

replace englspkarr=1 if immigrant==0
replace englspkcurr=1 if immigrant==0
replace engloffarr=1 if immigrant==0
replace engloffcurr=1 if immigrant==0
replace spenglspkarr=1 if spimmigrant==0
replace spenglspkcurr=1 if spimmigrant==0
replace spengloffarr=1 if spimmigrant==0
replace spengloffcurr=1 if spimmigrant==0

replace engloffarr=0 if englspkarr==1
replace engloffcurr=0 if englspkcurr==1
replace spengloffarr=0 if spenglspkarr==1
replace spengloffcurr=0 if spenglspkcurr==1

egen sumwt=sum(perwt), by (year)
generate corewgt=perwt/sumwt
egen spsumwt=sum(spperwt), by (year)
generate spcorewgt=spperwt/spsumwt

generate incbusfarm=incbus+incfarm if year<2000
replace incbusfarm=incbus00 if year==2000

generate qincbusfarm=qincbus+qincfarm if year<2000
replace qincbusfarm=qincbus if year==2000

generate spincbusfarm=spincbus+spincfarm if year<2000
replace spincbusfarm=spincbus00 if year==2000

generate spqincbusfarm=spqincbus+spqincfarm if year<2000
replace spqincbusfarm=spqincbus if year==2000


generate lnwage=lnw
replace lnwage=. if qincwage>0 | quhrswor>0 | qwkswork>0 | incbusfarm~=0 | classwkd==13 |classwkd==14 | qincbusfarm>0 | lnwage<log(1) | lnwage>log(200)

generate splnwage=splnw
replace splnwage=. if spqincwage>0 | spquhrswor>0 | spqwkswork>0 | spincbusfarm~=0 | spclasswkd==13 | spclasswkd==14 | spqincbusfarm>0 | splnwage<log(1) | splnwage>log(200)

*foreach var of varlist fertratearr gdpcaparr ear15ratioarr proprefugarr {
foreach var of varlist fertratearr gdpcaparr ear15ratioarr proprefugarr englspkarr engloffarr miles imgradecomp {
  replace `var'=0 if immigrant==0
  replace sp`var'=0 if spimmigrant==0
  gen ysm1`var'=ysm1*`var'
  gen ysmsq`var'=ysmsq*`var'
  gen spysm1sp`var'=spysm1*sp`var'
  gen spysmsqsp`var'=spysmsq*sp`var'
  gen y05`var'=y05*`var'
  gen y610`var'=y610*`var'
  gen y1115`var'=y1115*`var'
  gen y1620`var'=y1620*`var'
  gen y2130`var'=y2130*`var'
  gen y40`var'=y40*`var'
  gen spy05`var'=spy05*sp`var'
  gen spy610`var'=spy610*sp`var'
  gen spy1115`var'=spy1115*sp`var'
  gen spy1620`var'=spy1620*sp`var'
  gen spy2130`var'=spy2130*sp`var'
  gen spy40`var'=spy40*sp`var'
  
}

*foreach var of varlist fertratecurr gdpcapcurr ear15ratiocurr proprefugcurr {
foreach var of varlist fertratecurr gdpcapcurr ear15ratiocurr proprefugcurr englspkcurr engloffcurr {
  replace `var'=0 if immigrant==0
  replace sp`var'=0 if spimmigrant==0
  gen ysm1`var'=ysm1*`var'
  gen ysmsq`var'=ysmsq*`var'
  gen spysm1sp`var'=spysm1*sp`var'
  gen spysmsqsp`var'=spysmsq*sp`var'
  gen y05`var'=y05*`var'
  gen y610`var'=y610*`var'
  gen y1115`var'=y1115*`var'
  gen y1620`var'=y1620*`var'
  gen y2130`var'=y2130*`var'
  gen y40`var'=y40*`var'
  gen spy05`var'=spy05*sp`var'
  gen spy610`var'=spy610*sp`var'
  gen spy1115`var'=spy1115*sp`var'
  gen spy1620`var'=spy1620*sp`var'
  gen spy2130`var'=spy2130*sp`var'
  gen spy40`var'=spy40*sp`var'
}

*Create full enrolment rate interactions
foreach var of varlist y05-y40 {
  foreach var2 of varlist primaryfarr secondaryfarr primaryfcurr secondaryfcurr {
    gen `var'`var2'=`var'*`var2'
  }
  foreach var2 of varlist primarymarr secondarymarr primarymcurr secondarymcurr {
    gen sp`var'`var2'=sp`var'*sp`var2'
  }
}

gen cohort=1*impre50+2*im5059+3*im6064+4*im6569+5*im7074+6*im7579+7*im8084+8*im8590+9*im9194+10*im9500
gen spcohort=1*spimpre50+2*spim5059+3*spim6064+4*spim6569+5*spim7074+6*spim7579+7*spim8084+8*spim8590+9*spim9194+10*spim9500

gen ageatarrmin=age-5 if ysm1==2.5
replace ageatarrmin=age-10 if ysm1==8
replace ageatarrmin=age-15 if ysm1==13
replace ageatarrmin=age-20 if ysm1==18
replace ageatarrmin=age-30 if ysm1==25.5
replace ageatarrmin=0 if ysm1==40 | ageatarrmin<0
replace ageatarrmin=. if immigrant~=1

gen spageatarrmin=spage-5 if spysm1==2.5
replace spageatarrmin=spage-10 if spysm1==8
replace spageatarrmin=spage-15 if spysm1==13
replace spageatarrmin=spage-20 if spysm1==18
replace spageatarrmin=spage-30 if spysm1==25.5
replace spageatarrmin=0 if spysm1==40 | spageatarrmin<0
replace spageatarrmin=. if spimmigrant~=1

gen sample=0
replace sample=1 if corewgt>0 & spcorewgt>0 & age>=18 & age<=65 & spage>=18 & spage<=65 & (immigrant==0 | (immigrant==1 & final_code~=. & yrimmig~=0)) & (spimmigrant==0 | (spimmigrant==1 & spfinal_code~=. & spyrimmig~=0))
replace sample=0 if legalhrs==0 | legalimm==0
replace sample=0 if splegalhrs==0 | splegalimm==0
replace sample=0 if (immigrant==1 & ageatarrmin<18) | (spimmigrant==1 & spageatarrmin<18)
replace sample=0 if droppedcty==1 | spdroppedcty==1

drop ysm1miles ysmsqmiles spysm1spmiles spysmsqspmiles

*Wage-imputing regressions
*Wives
gen lowhrs=0
replace lowhrs=1 if wkswork1<20 | wkswork1==.

*This indicator also includes those who are allocated SE income
gen seind=0
replace seind=1 if incbusfarm~=0 & incbusfarm<.
replace seind=1 if classwkd==13 | classwkd==14
replace seind=1 if qincbusfarm>0

gen allocwsinc=0
replace allocwsinc=1 if qincwage>0

gen wageotlr=0
replace wageotlr=1 if (wage<1|wage>200) & wage~=.

gen lnwimp=lnw

foreach year of numlist 1980 1990 2000 {
  *OLD LINE  reg lnw age agesq gradecomp rhisp2-rhisp4 spage spagesq spgradecomp sprhisp2-sprhisp4 reg2-reg9 metro year2-year3 [aw=normwt] if sample==1 & lowhrs==1 & wageotlr==0 & i_wsval==0 & seind==0
  *reg lnw im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 y05-y40 age agesq educ2-educ4 rhisp2-rhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ fertratearr gdpcaparr ear15ratioarr proprefugarr primaryfarr secondaryfarr englspkarr engloffarr spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spy05-spy40 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ spfertratearr spgdpcaparr spear15ratioarr spproprefugarr spprimaryfarr spsecondaryfarr spenglspkarr spengloffarr reg2-reg9 california-nj [aw=corewgt] if year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0 & immigrant==1, cluster(final_code)
  *reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 reg2-reg9 california-nj [aw=corewgt] if year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *New line added April 3, 2007:
  *reg lnw age agesq gradecomp rhisp2-rhisp4 spage spagesq spgradecomp sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *A Regressions
  *reg lnw age agesq rhisp2-rhisp4 spage spagesq sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *B Regressions
  *reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *C Regressions
  reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles y*educ? spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles spy*educ? reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==1 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  predict lnwpred
  replace lnwimp=lnwpred if (wkswork1==0 | wageotlr==1 | allocwsinc==1 | seind==1) & lowhrs==1 & year==`year'
  drop lnwpred

  *reg lnw im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 y05-y40 age agesq educ2-educ4 rhisp2-rhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ fertratearr gdpcaparr ear15ratioarr proprefugarr primaryfarr secondaryfarr englspkarr engloffarr spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spy05-spy40 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ spfertratearr spgdpcaparr spear15ratioarr spproprefugarr spprimaryfarr spsecondaryfarr spenglspkarr spengloffarr reg2-reg9 california-nj [aw=corewgt] if year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0 & immigrant==1, cluster(final_code)
  *reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 reg2-reg9 california-nj [aw=corewgt] if year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *New line added April 3, 2007:
  *reg lnw age agesq gradecomp rhisp2-rhisp4 spage spagesq spgradecomp sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *A Regressions
  *reg lnw age agesq rhisp2-rhisp4 spage spagesq sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *B Regressions
  *reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  *C Regressions
  reg lnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles y*educ? spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles spy*educ? reg2-reg9 california-nj [aw=corewgt] if sample==1 & year==`year' & lowhrs==0 & wageotlr==0 & allocwsinc==0 & seind==0, cluster(final_code)
  predict lnwpred
  replace lnwimp=lnwpred if (wkswork1==0 | wageotlr==1 | allocwsinc==1 | seind==1) & lowhrs==0 & year==`year'
  drop lnwpred
}

*Husbands
gen splowhrs=0
replace splowhrs=1 if spwkswork1<20 | spwkswork1==.

*This indicator also includes those who are allocated SE income
gen spseind=0
replace spseind=1 if spincbusfarm~=0 & spincbusfarm<.
replace spseind=1 if spclasswkd==13 | spclasswkd==14
replace spseind=1 if spqincbusfarm>0

gen spallocwsinc=0
replace spallocwsinc=1 if spqincwage>0

gen spwageotlr=0
replace spwageotlr=1 if (spwage<1|spwage>200) & spwage~=.

gen splnwimp=splnw

foreach year of numlist 1980 1990 2000 {
  *OLD LINE  reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 reg2-reg9 metro year2-year3 [aw=normwt] if sample==1 & splowhrs==1 & spwageotlr==0 & spi_wsval==0 & spseind==0
  *reg splnw im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 y05-y40 age agesq educ2-educ4 rhisp2-rhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ fertratearr gdpcaparr ear15ratioarr proprefugarr primaryfarr secondaryfarr englspkarr engloffarr spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spy05-spy40 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ spfertratearr spgdpcaparr spear15ratioarr spproprefugarr spprimaryfarr spsecondaryfarr spenglspkarr spengloffarr reg2-reg9 california-nj [aw=spcorewgt] if year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 reg2-reg9 california-nj [aw=spcorewgt] if year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0 & spimmigrant==1, cluster(spfinal_code)
  *New line added April 3, 2007:
  *reg splnw age agesq gradecomp rhisp2-rhisp4 spage spagesq spgradecomp sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *A Regressions
  *reg splnw age agesq rhisp2-rhisp4 spage spagesq sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *B Regressions
  *reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *C Regressions
  reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles y*educ? spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles spy*educ? reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==1 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  predict splnwpred
  replace splnwimp=splnwpred if (spwkswork1==0 | spwageotlr==1 | spallocwsinc==1 | spseind==1) & splowhrs==1 & year==`year'
  drop splnwpred

  *reg splnw im5059 im6064 im6569 im7074 im7579 im8084 im8590 im9194 im9500 y05-y40 age agesq educ2-educ4 rhisp2-rhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ fertratearr gdpcaparr ear15ratioarr proprefugarr primaryfarr secondaryfarr englspkarr engloffarr spim5059 spim6064 spim6569 spim7074 spim7579 spim8084 spim8590 spim9194 spim9500 spy05-spy40 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 /*nchild0 nchild1 nchild2 nchild35 nchild611 nchild1217*/ spfertratearr spgdpcaparr spear15ratioarr spproprefugarr spprimaryfarr spsecondaryfarr spenglspkarr spengloffarr reg2-reg9 california-nj [aw=spcorewgt] if year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 reg2-reg9 california-nj [aw=spcorewgt] if year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0 & spimmigrant==1, cluster(spfinal_code)
  *New line added April 3, 2007:
  *reg splnw age agesq gradecomp rhisp2-rhisp4 spage spagesq spgradecomp sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *A Regressions
  *reg splnw age agesq rhisp2-rhisp4 spage spagesq sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *B Regressions
  *reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  *C Regressions
  reg splnw age agesq educ2-educ4 rhisp2-rhisp4 spage spagesq speduc2-speduc4 sprhisp2-sprhisp4 y05-y2130 y05*arr y610*arr y1115*arr y1620*arr y2130*arr y*miles y*educ? spy05-spy2130 spy05*arr spy610*arr spy1115*arr spy1620*arr spy2130*arr spy*miles reg2-reg9 spy*educ? california-nj [aw=spcorewgt] if sample==1 & year==`year' & splowhrs==0 & spwageotlr==0 & spallocwsinc==0 & spseind==0, cluster(spfinal_code)
  predict splnwpred
  replace splnwimp=splnwpred if (spwkswork1==0 | spwageotlr==1 | spallocwsinc==1 | spseind==1) & splowhrs==0 & year==`year'
  drop splnwpred
}

*Generate deciles by year
/*
for num 2/10: gen lnwpctX=0
foreach year of numlist 1980 1990 2000 {
_pctile lnwimp [aw=corewgt] if sample==1 & year==`year', nq(10)
scalar pct1=r(r1)
scalar pct2=r(r2)
scalar pct3=r(r3)
scalar pct4=r(r4)
scalar pct5=r(r5)
scalar pct6=r(r6)
scalar pct7=r(r7)
scalar pct8=r(r8)
scalar pct9=r(r9)
scalar pct10=.

for num 2/10 \ num 1/9: replace lnwpctX=cond(lnwimp>pctY & lnwimp<pctX,1,0) if year==`year'
}
*/
gen pctile=0
foreach year of numlist 1980 1990 2000 {
  xtile pctile`year'=lnwimp [aw=corewgt] if sample==1 & year==`year', nq(10)
  replace pctile=pctile`year' if year==`year'
  drop pctile`year'
}
tab pctile, gen(lnwpct)

gen sppctile=0
foreach year of numlist 1980 1990 2000 {
  xtile sppctile`year'=splnwimp [aw=spcorewgt] if sample==1 & year==`year', nq(10)
  replace sppctile=sppctile`year' if year==`year'
  drop sppctile`year'
}
tab sppctile, gen(splnwpct)

*Generate deciles interacted with year
foreach num of numlist 2/10 {
  gen lnwpct1980`num'=0
  replace lnwpct1980`num'=lnwpct`num' if year==1980
  gen lnwpct1990`num'=0
  replace lnwpct1990`num'=lnwpct`num' if year==1990
  gen lnwpct2000`num'=0
  replace lnwpct2000`num'=lnwpct`num' if year==2000
}
foreach num of numlist 2/10 {
  gen splnwpct1980`num'=0
  replace splnwpct1980`num'=splnwpct`num' if year==1980
  gen splnwpct1990`num'=0
  replace splnwpct1990`num'=splnwpct`num' if year==1990
  gen splnwpct2000`num'=0
  replace splnwpct2000`num'=splnwpct`num' if year==2000
}

drop ageatarrmin spageatarrmin sample /*y05primaryfarr-spy40secondarymcurr*/

save "$path\Regression dataset (married) (cts educ)", replace
